In [1]:
import pymysql
In [2]:
db = pymysql.connect(
"db.fastcamp.us",
"root",
"dkstncks",
"sakila",
charset='utf8',
)
In [3]:
rental_df = pd.read_sql("SELECT * FROM rental;", db)
inventory_df = pd.read_sql("SELECT * FROM inventory;", db)
film_df = pd.read_sql("SELECT * FROM film;", db)
film_category_df = pd.read_sql("SELECT * FROM film_category;", db)
category_df = pd.read_sql("SELECT * FROM category;", db)
In [4]:
rental_df.head(1)
Out[4]:
In [5]:
inventory_df.head(1)
Out[5]:
In [6]:
film_df.head(1)
Out[6]:
In [7]:
film_category_df.head(1)
Out[7]:
In [8]:
category_df.head(1)
Out[8]:
In [25]:
SQL_QUERY = """
SELECT
c.category_id category_id,
c.name category_name,
COUNT(*) rentals_per_category
FROM
rental r
JOIN inventory i ON r.inventory_id = i.inventory_id
JOIN film f ON f.film_id = i.film_id
JOIN film_category fc ON fc.film_id = f.film_id
JOIN category c ON fc.category_id = c.category_id
WHERE
c.name IN ("Family", "Sports", "Comedy")
GROUP BY
category_id
ORDER BY rentals_per_category DESC
;
"""
pd.read_sql(SQL_QUERY, db)
Out[25]:
In [ ]:
In [29]:
payment_df = pd.read_sql("SELECT * FROM payment;", db)
In [26]:
film_df.head(1)
Out[26]:
In [30]:
payment_df.head(1)
Out[30]:
In [31]:
inventory_df.head(1)
Out[31]:
In [32]:
rental_df.head(1)
Out[32]:
In [43]:
SQL_QUERY = """
SELECT
i.store_id store_id,
f.rating rating,
SUM(p.amount) total_revenue
FROM
payment p
JOIN rental r ON p.rental_id = r.rental_id
JOIN inventory i ON r.inventory_id = i.inventory_id
JOIN film f ON i.film_id = f.film_id
WHERE
i.store_id = 1
AND f.rating IN ("PG-13", "R")
GROUP BY
store_id,
rating
;
"""
pd.read_sql(SQL_QUERY, db)
Out[43]:
In [ ]:
In [49]:
# 1. 영화별 매출 - rental, film, inventory
REVENUE_PER_FILM_SQL_QUERY = """
SELECT
f.film_id film_id,
COUNT(*) * f.rental_rate revenue
FROM
rental r
JOIN inventory i ON r.inventory_id = i.inventory_id
JOIN film f ON i.film_id = f.film_id
GROUP BY
film_id
;
"""
pd.read_sql(REVENUE_PER_FILM_SQL_QUERY, db)
# 2. 배우별 매출 - actor, film_actor
SQL_QUERY = """
SELECT
a.actor_id,
a.last_name last_name,
a.first_name first_name,
SUM(rpf.revenue) revenue_per_actor
FROM ({REVENUE_PER_FILM_SQL_QUERY}) AS rpf
JOIN film_actor fa ON rpf.film_id = fa.film_id
JOIN actor a ON fa.actor_id = a.actor_id
GROUP BY
actor_id
ORDER BY revenue_per_actor DESC
;
""".format(
REVENUE_PER_FILM_SQL_QUERY=REVENUE_PER_FILM_SQL_QUERY.replace(";",""),
)
pd.read_sql(SQL_QUERY, db)
Out[49]: